## Figure A1

#directory
#set working directory to the path PvP_Replication
#setwd("~/PvP_Replication")

#packages
library(tidyverse) #for data cleaning and manipulation
library(kbal) #for calculating kernel balancing weights
library(cobalt) #plot balance

#load main dataset
pvp_data <- read.csv("PvP_data/PvP_data_main.csv")


#transform independent variable
#create binary (hectares > 100) cultivation variable
pvp_data <- pvp_data %>% 
  mutate(cultbinary = ifelse(maxcult > 100, 1, 0))

#balancing model

#transform selected covariates
#create variable for highways per square km, percent of population in rural zones, log population
pvp_data <- pvp_data %>% 
  mutate(hwycover = (hwylengthkm/sqkm), ruralpct = (ruralpop14/pop14)*100, logpop = log(pop14))

#list of covariates for kernel balancing
covariate_list <- c("lit_ratio", "electric_ratio", "logpop", "ruralpct", "elev_stdev", "hwycover", "distCali", "distBogota", "distMedellin")

#subset data to FARC municipalities for main anaylses
pvp_farc_subset <- pvp_data %>% filter(farc_presence == 1)

#note that there are two cases missing covariates
pvp_farc_subset %>% 
  dplyr::select(municode, all_of(covariate_list), dissident_presence, cultbinary) %>% 
  filter(!complete.cases(.)) %>% 
  count()

#drop the two incomplete cases
pvp_farc_subset <- pvp_farc_subset %>% 
  filter(complete.cases(pvp_farc_subset[ , c(covariate_list, 'dissident_presence', 'cultbinary')]))


#calculate weights using kernel balancing
pvp_farc_subset$kbalweights <- kbal(allx = pvp_farc_subset[,covariate_list],
                                    treatment = pvp_farc_subset$cultbinary,
                                    fullSVD = TRUE, meanfirst = F)$w

#create custom names for plotting covariates
varname_df <- data.frame(old = c("lit_ratio", "elev_stdev", "hwycover", "logpop", "electric_ratio", "ruralpct", "distCali", "distBogota", "distMedellin", "elev_stdev * ruralpct"), new = c("Literacy", "Rough Terrain", "Roads", "Population", "Rural", "Electricity",  "Cali Dist.", "Bogota Dist.", "Medellin Dist.", "*Rural x Terrain"))

#plot balance using cobalt
balance_plot <- cobalt::love.plot(x = cultbinary ~ lit_ratio + elev_stdev + hwycover + logpop + electric_ratio + ruralpct + distCali + distBogota + distMedellin + elev_stdev * ruralpct, 
                  treat = "cultbinary", data = pvp_farc_subset, var.names = varname_df, weights = "kbalweights",
                  title = "", s.d.denom = "treated", sample.names = c("Unadjusted", "Adjusted"), 
                  limits = list(m = c(-3, 1))) + 
                  xlab("Std. Mean Differences: Cocaine vs Non-Cocaine Municipalities") + 
                  theme(axis.text.y = element_text(face=c("bold")))

#save plot to folder
ggsave(plot = balance_plot, filename="PvP_plots/balance_plot.pdf", width = 6,
       height = 4, units = "in")
